# -*- python -*-
from calibre.web.feeds.news import BasicNewsRecipe

class Nrk(BasicNewsRecipe):
     title          = u'nrk.no'
     __author__     = 'sb'
     description    = 'Norske nyheter'
     publication_type = 'newspaper'
     language = 'no'
     oldest_article = 1
     max_articles_per_feed = 10
     no_stylesheets = True
     keep_only_tags = [ dict(name='article', attrs={'role':'main'}),
                        dict(name='time', attrs={'class':'relative'}),
                        dict(name='div', attrs={'class':'bulletin-text text-body'}) ]

     remove_tags    = [ dict(name='div', attrs={'class':'bulletin-share-button-bar'}),
                        dict(name='div', attrs={'class':'cf flow-content meta-widget lp_flow relations'}),
                        dict(name='div', attrs={'class':'cf flow-content meta-widget lp_universe relations'}) ]

     feeds          = [(u'Forsida', u'http://www.nrk.no/toppsaker.rss'),
                       (u'NRK Nyheter', u'http://www.nrk.no/nyheiter/siste.rss'),
                       (u'Norge', u'http://www.nrk.no/norge/toppsaker.rss'),
                       (u'NRK Østlandssendingen', u'http://www.nrk.no/ostlandssendingen/siste.rss'),
                       (u'NRK Nordland', u'http://www.nrk.no/nordland/siste.rss'),
                       (u'Verden', u'http://www.nrk.no/verden/toppsaker.rss'),
                       (u'Sport', u'http://www.nrksport.no/siste.rss'),
                       (u'Kultur', u'http://www.nrk.no/kultur/toppsaker.rss'),
                       (u'Livsstil', u'http://www.nrk.no/livsstil/toppsaker.rss'),
                       (u'Viten', u'http://www.nrk.no/viten/toppsaker.rss'),
                       (u'Fordyping', u'http://nrk.no/fordypning/toppsaker.rss'),
                       (u'Ytring', u'http://nrk.no/ytring/toppsaker.rss'),
                       (u'Musikknyheter fra P3', u'http://p3.no/musikk/feed/')]

     ignore_duplicate_articles = {'title', 'url'}

     extra_css = '\
body.nas .intro-element-article p, body.nas .article p { line-height: 1.5; } \
.img-center p, .img-left p, .img-right p, .img-center address, .img-left address, .img-right address, .article .img-center p, .article .img-left p, .article .img-right p, .article .img-center address, .article .img-left address, .article .img-right address, .video p { background-color: #E8E8E8; color: #000000; font-size: 10px; font-weight: 400; margin: 0px; padding: 4px; } \
ul.byline li address span, ul.byline li address a { color: #777777; display: block; font-style: normal; font-weight: 700; line-height: 12px; text-transform: uppercase; } \
.image figcaption { background-color: #E5E5E5; color: #333333; font-size: 0.8em; font-weight: 400; line-height: 1.4; padding: 0px 9px; } \
.image figcaption p { font-family: "Helvetica Neue", "Arial", sans-serif; margin: 7px 0px; color: #333333; font-size: 0.8em; font-weight: 400; line-height: 1.4; } \
.image div { background-color: #E5E5E5; color: #333333; font-size: 0.8em; font-weight: 400; line-height: 1.4; padding: 0px 9px; } \
.image div p { font-family: "Helvetica Neue", "Arial", sans-serif; margin: 7px 0px; color: #333333; font-size: 0.8em; font-weight: 400; line-height: 1.4; } \
* { background-color: transparent; font-family: "Verdana", "Arial", "Helvetica", sans-serif; margin: 0px; padding: 0px; text-align: left; } \
.avatar img { height: 64px; width: 64px; } \
.bulletin-title { margin: 13px 16px 0px; font-size: 14px; font-weight: 700; text-transform: uppercase; } \
.bulletin-text p { font-size: 14px; line-height: 1.36em; } \
'
     def image_url_processor(self, baseurl, url):
          return url.replace('http://gfx.nrk.no//', 'http://gfx.nrk.no/')

     def get_cover_url(self):
         url = 'file:///tmp/nrk.jpg'
         return url

     def preprocess_raw_html(self, raw_html, url):
          # Strip the <noscript> tag before it's stripped together with its children (which is a usable <img> tag).
          raw_html = self.noscriptStartAndEndTag.sub('', raw_html)
          return raw_html

     # Regular expressions used to find <noscript> tags.
     noscriptStartAndEndTag = re.compile('</?noscript>')
